Import

tokyo_df <- "https://stopcovid19.metro.tokyo.lg.jp/data/130001_tokyo_covid19_patients.csv" %>% 
  readr::read_csv() %>% 
  # print() %>% str()
  dplyr::mutate(ageBracket = as.factor(`患者_年代`) %>% 
                  forcats::fct_collapse(`不明` = c("-", "不明")) %>% 
                  forcats::fct_relevel("10歳未満", "10代", "20代", "30代",
                                       "40代", "50代", "60代", "70代",
                                       "80代", "90代", "100歳以上",
                                       "不明まはた非公開"),
                gender = forcats::as_factor(`患者_性別`)) %>% 
  dplyr::select(date = `公表_年月日`, pref = `都道府県名`, ageBracket, gender)

tokyo_df

Data wrangling

 

日別

tokyo_daily <- tokyo_df %>% 
  dplyr::group_by(date) %>% 
  dplyr::summarise(n = dplyr::n()) %>% 
  dplyr::ungroup() %>% 
  tidyr::complete(
    date = seq.Date(from = min(date), to = max(date), by = "day"),
    fill = list(n = 0L)
  ) %>% 
  dplyr::mutate(
    diff = lagdiff(n),   # 前日差
    cum = cumsum(n),     # 累計
    ma7 = ma7(n),        # 移動平均(7日)
    ma28 = ma28(n)       # 移動平均(28日)
  )

tokyo_daily

 

年代別

tokyo_ageBracket_daily <- tokyo_df %>% 
  daily_aggregate(date, ageBracket)

tokyo_ageBracket_daily

 

Visualize

tokyo_ageBracket_daily %>% 
  dplyr::filter(date == max(date))